###############################################################################
## AUTHOR: ALAN YAN
## DATE LAST UPDATED: 07/17/2020
## PURPOSE: HET EFFECTS VIA RANDOM FORESTS (OUTCOME == WORK)
###############################################################################
rm(list = ls())

#### NOTES ####

#(1) create a tidier function for random forest output

#### PACKAGES ####
library(pacman)
p_load(tidyverse,
       estimatr,
       broom,
       haven,
       hrbrthemes,
       cjoint,
       formula.tools, 
       DeclareDesign,
       emmeans,
       lmtest,
       cregg,
       grf)
source("01-yougov-conjoint/03-src/forest-functions.R")

#### FUNCTIONS ####

# ggplot theme
theme_shom_alt <- function (base_size = 11, waffle = FALSE) 
{
  ret <- theme_minimal(base_size = base_size, base_family = "Roboto Condensed") + 
    theme(plot.background = element_rect(fill = "#f5f5f2", 
                                         color = NA), 
          panel.background = element_rect(fill = "#f5f5f2",color = NA), 
          legend.background = element_rect(fill = "#f5f5f2",color = NA))
  if (waffle) {
    ret + theme(axis.text = element_blank())
  }
  else {
    ret
  }
}

# lm robust function that includes omitted categories for plotting
lm_robust_omitted <- function(formula, data, clusters) {
  require(formula.tools)
  require(tidyverse)
  require(haven)
  results <- lm_robust(formula = formula,
                       data = data,
                       clusters = clusters) %>%
    tidy() 
  omitted_cat <- model.frame(formula = formula, data = data) %>% 
    select(2:length(.)) %>% 
    sapply(., levels) %>% 
    lapply(., "[[", 1) %>% 
    unlist() %>%
    as.data.frame()
  omitted_cat_df <- cbind(paste0(rownames(omitted_cat), omitted_cat[,1]), 
                          0, 0, 0, 0, 0, 0, 0, 
                          lhs.vars(formula)) %>%
    as.data.frame() %>%
    zap_labels() %>%
    setNames(., names(results))
  results_with_omitted <- rbind(results, 
                                omitted_cat_df)
  results_with_omitted[,c(2:8)] <- sapply(results_with_omitted[,c(2:8)],as.numeric)
  return(results_with_omitted)
}

#### LOAD DATA ####
df <- read_rds("01-yougov-conjoint/01-data/cleaned/yougov-conjoint-cleaned.rds")


#### CODE DATA FOR CAUSAL FORESTS ####

df %>%
  mutate(is_college = ifelse(r_edu %in% c("4-year college","Post-grad"), 1, 0),
         is_white = ifelse(r_race == "White", 1, 0),
         is_democrat = ifelse(r_party %in% c(1,2,3), 1, 0),
         is_republican = ifelse(r_party %in% c(5,6,7),1, 0),
         is_independent = ifelse(r_party == 4, 1, 0),
         is_female = ifelse(r_female == "Female", 1, 0),
         is_union = ifelse(r_union_member == "Union member", 1, 0),
         is_employed_ft = ifelse(r_employment == "Full-time", 1, 0),
         is_employed_pt = ifelse(r_employment == "Part-time", 1, 0),
         is_retired = ifelse(r_employment == "Retired", 1, 0),
         is_unemployed = ifelse(r_employment %in% c("Homemaker","Other",
                                                    "Unemployed","Temporarily laid off",
                                                    "Student","Permanently disabled"), 1, 0),
         is_blue_collar = ifelse(r_industry.blue_white == "Blue-collar", 1, 0),
         is_white_collar = ifelse(r_industry.blue_white == "White-collar", 1, 0),
         cje_cg_worker_shareholders = ifelse(cje_corporate_gov == "Workers are shareholders", 1, 0),
         cje_cg_worker_corporate_board = ifelse(cje_corporate_gov == "Workers sit on the corporate board", 1, 0),
         cje_cg_worker_elect_managers = ifelse(cje_corporate_gov == "Workers elect their managers", 1, 0),
         cje_union_unionshop = ifelse(cje_union == "Unionized", 1, 0)) -> df_grf

#### LIST OF COVARIATES ####

covariates <- c("is_college","is_white","is_democrat","is_republican","is_independent",
                "is_female","is_union","is_employed_ft","is_employed_pt",
                "is_retired","is_unemployed", "is_blue_collar", "is_white_collar", "r_age")




##### generalized random forests (elect-managers versus private ownership) ####

#estimate model
effects_forest_elect_vs_private_work <- run_causal_forest_cjoint(
  df = df_grf,
  outcome = work_binary,
  treatment_cjoint = cje_corporate_gov,
  treatment_binary = cje_cg_worker_elect_managers,
  treatment_comparison = c("Workers elect their managers",
                           "Privately owned by non-worker shareholders"),
  covars = covariates
)
test_calibration(effects_forest_elect_vs_private_work)

#tidy
elect_vs_private_work_td <- tidy_causal_forest(effects_forest_elect_vs_private_work) %>%
  mutate(outcome = "Prefer to Work",
         treatment = "Elect managers vs\nPrivate Ownership Shareholders")

# Estimate the conditional average treatment effect on the full sample (CATE).
elect_vs_private_work_ate <- average_treatment_effect(effects_forest_elect_vs_private_work, 
                                                 target.sample = "all")

#plot individual-level predictions
elect_vs_private_work_td %>%
  drop_na(party_char) %>%
  ggplot(aes(x = id,y = tau.hat)) +
  geom_linerange(aes(ymin = lower.90,ymax = upper.90),color = "indianred",alpha = 0.7) + 
  geom_linerange(aes(ymin = lower.95,ymax = upper.95),color = "indianred",alpha = 0.3) + 
  geom_hline(aes(yintercept = unique(cate))) + #estimated causal effect full sample
  geom_ribbon(aes(ymin = unique(cate) - 1.96*unique(cate_se),
                  ymax = unique(cate) + 1.96*unique(cate_se)),
              alpha = 0.3) + 
  geom_point(size = 1.5,shape = 21,aes(fill = party_char)) + 
  scale_fill_manual(values = c("dodgerblue","white","indianred")) + 
  theme_shom_alt(base_size = 16) + 
  theme(axis.text.y = element_blank()) + 
  labs(x = "",
       y = "Predicted Effect") + 
  geom_hline(yintercept = 0,linetype = "dashed",size = 1.5) + 
  coord_flip()

##### generalized random forests (workers on corporate board versus private ownership) ####

#estimate model
effects_forest_board_vs_private_work <- run_causal_forest_cjoint(
  df = df_grf,
  outcome = work_binary,
  treatment_cjoint = cje_corporate_gov,
  treatment_binary = cje_cg_worker_corporate_board,
  treatment_comparison = c("Workers sit on the corporate board",
                           "Privately owned by non-worker shareholders"),
  covars = covariates
)

#tidy
board_vs_private_work_td <- tidy_causal_forest(effects_forest_board_vs_private_work) %>%
  mutate(outcome = "Prefer to Work",
         treatment = "Workers on Board vs\nPrivate Ownership Shareholders")

# Estimate the conditional average treatment effect on the full sample (CATE).
board_vs_private_work_ate <- average_treatment_effect(effects_forest_board_vs_private_work, 
                                                 target.sample = "all")


#plot individual-level predictions
board_vs_private_work_td %>%
  drop_na(party_char) %>%
  ggplot(aes(x = id,y = tau.hat)) +
  geom_linerange(aes(ymin = lower.90,ymax = upper.90),color = "indianred",alpha = 0.2) + #CIs around point estimates
  geom_linerange(aes(ymin = lower.95,ymax = upper.95),color = "indianred",alpha = 0.1) + 
  geom_hline(aes(yintercept = unique(cate))) + #estimated causal effect full sample
  geom_ribbon(aes(ymin = unique(cate) - 1.96*unique(cate_se),
                  ymax = unique(cate) + 1.96*unique(cate_se)),
              alpha = 0.3) + 
  geom_point(size = 1.5,shape = 21,aes(fill = party_char)) +#estimated unit level causal effect
  scale_fill_manual(values = c("dodgerblue","white","indianred")) + 
  theme_shom_alt(base_size = 16) + 
  theme(axis.text.y = element_blank()) + 
  labs(x = "",
       y = "Predicted Effect") + 
  geom_hline(yintercept = 0,linetype = "dashed",size = 1.5) + 
  coord_flip()

##### generalized random forests (esop versus private ownership) ####

#estimate model
effects_forest_esop_vs_private_work <- run_causal_forest_cjoint(
  df = df_grf,
  outcome = power_binary,
  treatment_cjoint = cje_corporate_gov,
  treatment_binary = cje_cg_worker_shareholders,
  treatment_comparison = c("Workers are shareholders",
                           "Privately owned by non-worker shareholders"),
  covars = covariates
)

#tidy
esop_vs_private_work_td <- tidy_causal_forest(effects_forest_esop_vs_private_work) %>%
  mutate(outcome = "Prefer to Work",
         treatment = "Workers are Shareholders vs\nPrivate Ownership Shareholders")

# Estimate the conditional average treatment effect on the full sample (CATE).
esop_vs_private_work_ate <- average_treatment_effect(effects_forest_esop_vs_private_work, 
                                                 target.sample = "all")


#plot individual-level predictions
esop_vs_private_work_td %>%
  drop_na(party_char) %>%
  ggplot(aes(x = id,y = tau.hat)) +
  geom_linerange(aes(ymin = lower.90,ymax = upper.90),color = "indianred",alpha = 0.2) + #CIs around point estimates
  geom_linerange(aes(ymin = lower.95,ymax = upper.95),color = "indianred",alpha = 0.1) + 
  geom_hline(aes(yintercept = unique(cate))) + #estimated causal effect full sample
  geom_ribbon(aes(ymin = unique(cate) - 1.96*unique(cate_se),
                  ymax = unique(cate) + 1.96*unique(cate_se)),
              alpha = 0.3) + 
  geom_point(size = 1.5,shape = 21,aes(fill = party_char)) +#estimated unit level causal effect
  scale_fill_manual(values = c("dodgerblue","white","indianred")) + 
  theme_shom_alt(base_size = 16) + 
  theme(axis.text.y = element_blank()) + 
  labs(x = "",
       y = "Predicted Effect",
       fill = "Party Identification") + 
  geom_hline(yintercept = 0,linetype = "dashed",size = 1.5) + 
  coord_flip()

#export results
write_csv(bind_rows(esop_vs_private_work_td,elect_vs_private_work_td,
                    board_vs_private_work_td),
          path = "01-yougov-conjoint/01-data/intermediate/hte-forests-work.csv")


